# Prepare complete analysis data set based on all states
# By Dom Valentino and Chris Kenny
# libs --------------------------------------------------------------------
library(plyr) # rbind.fill()
library(tidyverse) # select()
library(haven) # read_dta()
library(datasets)

# set wd to source file location
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

# table of contents -------------------------------------------------------
# data: read data
# combine: rbind all data together

# data --------------------------------------------------------------------
final_pre <- readRDS("../data/returns_pre/returns_pre.Rds") %>% 
  select(-fips) %>% 
  mutate(county_id = NA)
final_2020 <- read.csv("../data/analysis/analysis2020.csv") %>% 
  select(state, county, county_id, year, treat, dem_share_gov, dem_share_pres, dem_share_sen) %>% 
  mutate(county = str_replace(county, " City| city", ""))
thompson <- read_dta("../data/modified data/analysis.dta") %>% 
  select(state, county, county_id, year, treat, dem_share_gov, dem_share_pres, dem_share_sen)

rm(list=setdiff(ls(), c("final_pre", "final_2020", "thompson"))) # remove all objects except those needed

# combine -----------------------------------------------------------------
final <- rbind.fill(final_pre, final_2020) %>% 
  rbind.fill(., thompson) %>% 
  mutate(year2 = as.numeric(year)^2) %>% 
  group_by(state, county)
grpid <- group_indices(final)
final <- final %>% ungroup() %>% 
  mutate(county_id = grpid) %>% 
  group_by(state, year) 
grpid <- group_indices(final)
final <- final %>% ungroup() %>%
  mutate(state_year_id = grpid)

divs <- tibble(state = state.abb, division = as.character(state.division)) 

final <- left_join(final, divs, by = "state") %>% 
  filter(state != "DC")%>% 
  group_by(division, year) 
grpid <- group_indices(final)
final <- final %>% ungroup() %>%
  mutate(div_year_id = grpid)
final <- final %>% group_by(state, county, year) %>% 
  mutate(id = 1:n()) %>% 
  group_by(state, year, id) 
grpid <- group_indices(final)
final <- final %>% ungroup() %>%
  mutate(election_id = grpid, year = as.numeric(year)) 

write_csv(final, "../data/analysis/analysis_all_states.csv")
write_dta(final, "../data/analysis/analysis_all_states.dta")
